The PGC3 MDD study is including the following analyses to identify genes associated with MDD:


Read in results from all analyses

Show code

library(biomaRt)
library(GenomicRanges)
# Insert nearest gene information
gene_attributes = c('ensembl_gene_id', 'hgnc_symbol', 'external_gene_name','chromosome_name','start_position','end_position')
# GRCh37 for position
ensembl37 = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl", GRCh=37)
Genes37<-getBM(attributes=gene_attributes, mart = ensembl37)
# remove alternative contigs
Genes37 <- Genes37[Genes37$chromosome_name %in% c(as.character(1:22), 'X'),]
# remove duplicated entries
Genes37$cpid <- with(Genes37, paste0(chromosome_name, ':', start_position, '-', end_position))
Genes37 <- Genes37[!duplicated(Genes37$ensembl_gene_id),]
Genes37 <- Genes37[!duplicated(Genes37$cpid),]
Genes37 <- Genes37[!duplicated(Genes37$external_gene_name),]

# GRCH38 for gene names
ensembl38 = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl")
Genes38<-getBM(attributes=gene_attributes, mart = ensembl38)
# remove alternative contigs
Genes38 <- Genes38[Genes38$chromosome_name %in% c(as.character(1:22), 'X'),]
# remove duplicated entries
Genes38$cpid <- with(Genes38, paste0(chromosome_name, ':', start_position, '-', end_position))
Genes38 <- Genes38[!duplicated(Genes38$ensembl_gene_id),]
Genes38 <- Genes38[!duplicated(Genes38$cpid),]
#no_gene_name38 <- which(Genes38$external_gene_name == "")
#Genes38$external_gene_name[no_gene_name38] = Genes38$cpid[no_gene_name38]
#Genes38 <- Genes38[!duplicated(Genes38$external_gene_name),]

# 37 positions with 38 gene names
Genes <- merge(Genes37, Genes38[,c('ensembl_gene_id', 'chromosome_name', 'external_gene_name', 'hgnc_symbol', 'start_position', 'end_position')], by=c('ensembl_gene_id'), all=TRUE, suffix=c('.37', '.38'))


# copy over build 37 gene name if it is missing in 38
coalesce_gene_name <- which(is.na(Genes$external_gene_name.38))
Genes$external_gene_name = with(Genes, ifelse(is.na(external_gene_name.38) | external_gene_name.38 == "", yes=external_gene_name.37, no=external_gene_name.38))
##########
# Nearest gene
##########

library(data.table)

# Read in GWAS results
# Currently we are using results only for lead indepdendant associations from COJO
# I think this is fine
indep_hits<-fread(here::here('docs/tables/meta_snps_full_eur.cojo.txt'))

# Link SNPs to nearest gene

window<-50000

for(i in 1:nrow(indep_hits)){
  Genes_i<-Genes[which(Genes$start_position.37 < (indep_hits$BP[i] + window) & Genes$end_position.37 > (indep_hits$BP[i] - window) & Genes$chromosome_name.37 == indep_hits$CHR[i]),]
  if(nrow(Genes_i) != 0){
    gene_string<-NULL
    for(j in 1:nrow(Genes_i)){
      if(indep_hits$BP[i] > Genes_i$start_position.37[j] & indep_hits$BP[i] < Genes_i$end_position.37[j]){
        gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
                                                   ENSGID=Genes_i$ensembl_gene_id[j],
                                                   Dist=0,
                                                   Pos=NA))
      }
      if(indep_hits$BP[i] < Genes_i$start_position.37[j]){
        gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
                                                   ENSGID=Genes_i$ensembl_gene_id[j],
                                                   Dist=abs(indep_hits$BP[i] - Genes_i$start_position.37[j]),
                                                   Pos='-'))
      }
      if(indep_hits$BP[i] > Genes_i$end_position.37[j]){
        gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
                                                   ENSGID=Genes_i$ensembl_gene_id[j],
                                                   Dist=abs(indep_hits$BP[i] - Genes_i$end_position.37[j]),
                                                   Pos='+'))
      }
    }
    gene_string<-gene_string[order(gene_string$Dist),]
    indep_hits$NearestGene[i]<-as.character(gene_string$ID[1])
    indep_hits$NearestENSG[i]<-as.character(gene_string$ENSGID[1])
  } else {
    indep_hits$NearestGene[i]<-NA
    indep_hits$NearestENSG[i]<-NA
  }
}
##########
# SNP-finemapping
##########

# Read in finemapping results from Joni. This table shows genes implicated by the finemapping results, by a gene containing the entire 95% credible set.
finemap<-fread(here::here('docs/tables/finemapping/Locus_FineMapping_Full_Results.csv'))

# parse out gene names
finemap_genes<-unlist(strsplit(finemap$High.Confidence.Genes.Names, ','))
finemap_genes<-finemap_genes[finemap_genes != '-']
# parse out ensembl ids
finemap_geneids<-unlist(strsplit(finemap$High.Confidence.Genes.IDs, ','))
finemap_geneids<-finemap_geneids[finemap_geneids != '-']
finemap_geneids <- sapply(strsplit(finemap_geneids, '\\.'), function(g) g[[1]])
##########
# FastBAT
##########

# Read in FastBAT results
fastbat<-fread(here::here('docs/tables/fastBAT/mdd_fastbat_AllgeneMatrix.gene.fastbat'))
##########
# H-MAGMA
##########

hmagma<-fread(here::here('docs/tables/H-MAGMA/PGC_MDD_Results_mar2022.csv'))

# Exclude astrocytes
hmagma_noAstr<-hmagma[hmagma$Analysis != 'Astrocytes',]

# Apply FDR correction across all tests
hmagma_noAstr$P.FDR<-p.adjust(hmagma_noAstr$P, method = 'fdr')

hmagma_noAstr<-merge(hmagma_noAstr, Genes, by.x='GENE', by.y='ensembl_gene_id')
##########
# TWAS
##########

twas<-fread(here::here('docs/tables/twas/PGC_MDD3_twas_AllTissues_GW.txt'))
twas$chromosome_name <- as.character(twas$CHR)
twas$twas_id <- 1:nrow(twas)

# merge by scaffold (exact overlap)
twas_gr <- with(twas, GRanges(seqnames=chromosome_name, ranges=IRanges(start=P0, end=P1)))
genes_gr <- with(Genes[!is.na(Genes$chromosome_name.37),], GRanges(seqnames=chromosome_name.37, ranges=IRanges(start=start_position.37, end=end_position.37)))

twas_genes_overlaps <- findOverlaps(twas_gr, genes_gr, type='equal')

twas_scaffolds <- twas[twas_genes_overlaps@from,]
twas_scaffolds$ensembl_gene_id <- Genes$ensembl_gene_id[twas_genes_overlaps@to]
twas_scaffolds$external_gene_name.37 <- Genes$external_gene_name.37[twas_genes_overlaps@to]

# merge unmatched twas results by symbol and partial overlap
twas_unmatched <- twas[!twas$twas_id %in% twas_scaffolds$twas_id,]
twas_unmatched_gr <- with(twas_unmatched, GRanges(seqnames=chromosome_name, ranges=IRanges(start=P0, end=P1)))
# find overlaps
twas_unmatched_genes_overlaps <- findOverlaps(twas_unmatched_gr, genes_gr, maxgap=window)
# merge in gene names
twas_symbols <- twas_unmatched[twas_unmatched_genes_overlaps@from,]
twas_symbols$ensembl_gene_id <-  Genes$ensembl_gene_id[twas_unmatched_genes_overlaps@to]
twas_symbols$external_gene_name.37 <- Genes$external_gene_name.37[twas_unmatched_genes_overlaps@to]
# keep matches with symbols match
twas_matched_symbols <- twas_symbols[which(twas_symbols$ID == twas_symbols$external_gene_name.37),]

twas_genes <- rbind(twas_scaffolds, twas_matched_symbols)

twas_sig<-twas_genes[twas_genes$TWAS.P < 3.685926e-08,]
##########
# Colocalisation
##########

coloc<-read.csv(here::here('docs/tables/twas/PGC_MDD3_TWAS_colocalisation.csv'))
coloc_sig<-coloc[coloc$COLOC.PP4 > 0.8,]

coloc_sig <- merge(coloc_sig, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
##########
# FOCUS 
##########

focus<-read.csv(here::here('docs/tables/twas/PGC_MDD3_TWAS.TWSig.FOCUS.results.csv'))

fusion <- fread(here::here("docs/tables/twas/PGC_MDD3_twas_AllTissues_TWSig_CLEAN.txt"))
fusion<-fusion[,c('PANEL','PANEL_clean_short','PANEL_clean'), with=F]
fusion<-fusion[!duplicated(fusion),]

focus<-merge(focus, fusion, by.x='SNP.weight.Set', by.y='PANEL_clean_short')

focus_sig<-focus[focus$FOCUS_pip > 0.5,]

focus_sig <- merge(focus_sig, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
##########
# Expression analysis based high confidence genes
##########

expression_highconf_res<-fread(here::here('docs/tables/twas/PGC3_MDD_TWAS_HighConf_results.csv'))

expression_highconf_res <- merge(expression_highconf_res, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
##########
# SMR
##########

# Read in the SMR results
smr_res<-list()

smr_res[['eQTLGen_Blood']]<-fread(here::here('docs/tables/twas/eqtlgen_smr_res_GW_withIDs.csv'))

names(smr_res[['eQTLGen_Blood']])[names(smr_res[['eQTLGen_Blood']]) == 'GeneSymbol']<-'HGNCName'
smr_res[['eQTLGen_Blood']]$eQTL_source<-'eQTLGen_Blood'

tissue<-c("Basalganglia","Cerebellum","Cortex","Hippocampus","Spinalcord")

for(tissue_i in tissue){
  smr_res[[tissue_i]]<-fread(here::here(paste0('docs/tables/twas/metabrain_',tissue_i,'_smr_res_GW_withIDs.csv')))
  smr_res[[tissue_i]]$eQTL_source<-paste0('MetaBrain_',tissue_i)
}

smr_res_dat<-do.call(rbind, smr_res)
smr_res_dat$p_SMR_fdr_all<-p.adjust(smr_res_dat$p_SMR, method="fdr")

smr_res_dat_sig<-smr_res_dat[smr_res_dat$p_SMR_fdr_all < 0.05,]
##########
# HEIDI
##########

heidi<-smr_res_dat_sig[smr_res_dat_sig$p_HEIDI > 0.05,]
##########
# PWAS
##########

# For no just read in the ROSMAP results
pwas<-NULL
for(i in 1:22){
  if(i != 6){
    pwas<-rbind(pwas, fread(here::here(paste0('docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i))))
  } else {
    pwas<-rbind(pwas, fread(here::here(paste0('docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i))))
    pwas<-rbind(pwas, fread(here::here(paste0('docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i,'.MHC'))))
  }
}

pwas$TWAS.P.FDR<-p.adjust(pwas$TWAS.P)
pwas$ID<-gsub('\\..*','', pwas$ID)

# Read in PWAS and SMR results for all significant ROSMAP PWAS assoc results
pwas_smr_rosmap_banner<-fread(here::here('docs/tables/pwas/rosmap_banner_pwas_smr_results.csv'))

pwas_smr_rosmap_banner <- merge(pwas_smr_rosmap_banner, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
########
# PsyOPS
########

psyops <- fread(here::here('docs/tables/psyops/psyops_full_eur.cojo.txt'))
psyops$psy_id <- 1:nrow(psyops)
psyops_genes37 <- merge(psyops, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='gene', by.y='external_gene_name.37')
psyops_genes38 <- merge(psyops, Genes[,c('ensembl_gene_id', 'external_gene_name.38')], by.x='gene', by.y='external_gene_name.38')
psyops_genesen <- merge(psyops, Genes[,c('ensembl_gene_id','external_gene_name')], by.x='gene', by.y='ensembl_gene_id')
psyops_genesen$ensembl_gene_id <- psyops_genesen$gene
psyops_genesen$external_gene_name <- NULL

psyops_genes <- unique(rbind(psyops_genes37, psyops_genes38, psyops_genesen))
psyops_genes <- psyops_genes[!duplicated(psyops_genes$psy_id),]

psyops_select <- psyops_genes[with(psyops_genes, which(extreme_pLI | brain_enriched_expression | neurodevelopmental_disorder)),]

psyops_prioritised<-NULL
for(i in unique(psyops_select$lead_variant)){
  tmp<-psyops_select[psyops_select$lead_variant == i,]
  tmp<-tmp[tmp$PsyOPS_score == max(tmp$PsyOPS_score),]
  tmp<-tmp[tmp$distance == min(tmp$distance),]
  psyops_prioritised<-rbind(psyops_prioritised, tmp)
}

Create UpSet plot

This plot will show the number of genes returned by each analysis and the overlap of each analysis

Show code

# Create data.frame listing genes with T/F indicating whether it was supported by each analysis 
gene_overlap<-list()
gene_overlap[['Fine-mapping']]<-finemap_geneids
gene_overlap[['Expression']]<-expression_highconf_res$ensembl_gene_id
gene_overlap[['Protein']]<-pwas_smr_rosmap_banner$ensembl_gene_id[which(pwas_smr_rosmap_banner$banner_replicated  & pwas_smr_rosmap_banner$rosmap.causal & pwas_smr_rosmap_banner$smr.causal)]
gene_overlap[['Nearest Gene']]<-indep_hits$NearestENSG[!is.na(indep_hits$NearestENSG)]
gene_overlap[['fastBAT']]<-fastbat$Gene[fastbat$Pvalue < 2e-6]
gene_overlap[['H-MAGMA']]<-unique(hmagma_noAstr$GENE[hmagma_noAstr$P.FDR < 3.73e-6])
gene_overlap[['PsyOPS']]<-psyops_prioritised$ensembl_gene_id

library(UpSetR)

png(here::here('docs/figures/gene_consensus_upset_dense.png'), units = 'px', res=300, height=1500, width=2500)

upset(fromList(gene_overlap), nsets=10, order.by = "freq")

dev.off()
## png 
##   2
gene_overlap_highconf <- gene_overlap[c('Fine-mapping','Expression', 'Protein', 'PsyOPS')]

png(here::here('docs/figures/gene_consensus_upset_highconf.png'), units = 'px', res=300, height=1400, width=1500)

upset(fromList(gene_overlap_highconf), nsets=10, order.by = "freq")

dev.off()
## png 
##   2

Show UpSet plot of genes across all analyses

High-confidence genes

Show UpSet plot of genes across high-confidence analyses

High-confidence genes


Compare high confidence genes across all analyses

Show code

# Define high confidence associations
# - Genes implicated by finemapping
# - Genes implicated by TWAS, colocalisation and FOCUS from any panel
# - Genes implicated by PWAS, SMR, coloc and HEIDI in ROSMAP and Banner
high_conf<-unique(unlist(gene_overlap[c('Fine-mapping','Expression', 'Protein')]))
ENSGIDs <- Genes[,c('ensembl_gene_id', 'external_gene_name')]
names(ENSGIDs) <- c('ENSGID', 'ID')
high_conf_tab <- merge(data.frame(ENSGID=high_conf), ENSGIDs)

# finemap
# Joni said he used the same gene list as David Howard (fastBAT)
high_conf_tab$finemap<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(high_conf_tab$ENSGID[i] %in% fastbat$Gene | high_conf_tab$ENSGID[i] %in% finemap_geneids){
    if(high_conf_tab$ENSGID[i] %in% finemap_geneids){
      high_conf_tab$finemap[i]<-'Sig'
    } else {
      high_conf_tab$finemap[i]<-'Non-Sig'
    }
  }
}

# expression
high_conf_tab$expression<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(!(high_conf_tab$ID[i] %in% twas$ID)){
    high_conf_tab$expression[i]<-'NA'
  } else {
    if(!(high_conf_tab$ID[i] %in% expression_highconf_res$ID)){
      high_conf_tab$expression[i]<-'Non-Sig'
    } else {
      if(expression_highconf_res$`SNP-weight Set`[expression_highconf_res$ID == high_conf_tab$ID[i]] == "CMC DLPFC Splicing"){
        high_conf_tab$expression[i]<-'Sig'
      } else {
        if(expression_highconf_res$TWAS.Z[expression_highconf_res$ID == high_conf_tab$ID[i]] > 0){
            high_conf_tab$expression[i]<-'Pos'
        } else {
            high_conf_tab$expression[i]<-'Neg'
        }
      }
    }
  }
}

# protein
high_conf_tab$protein<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(!(high_conf_tab$ENSGID[i] %in% pwas$ID)){
    high_conf_tab$protein[i]<-'NA'
  } else {
    if(!(high_conf_tab$ID[i] %in% pwas_smr_rosmap_banner$ID[which(pwas_smr_rosmap_banner$banner_replicated & pwas_smr_rosmap_banner$rosmap.causal & pwas_smr_rosmap_banner$smr.causal)])){
      high_conf_tab$protein[i]<-'Non-Sig'
    } else {
      if(pwas_smr_rosmap_banner$rosmap.TWAS.Z[pwas_smr_rosmap_banner$ID == high_conf_tab$ID[i]] > 0){
          high_conf_tab$protein[i]<-'Pos'
      } else {
          high_conf_tab$protein[i]<-'Neg'
      }
    }
  }
}

# fastBAT
high_conf_tab$fastBAT<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(high_conf_tab$ENSGID[i] %in% fastbat$Gene){
    if(high_conf_tab$ENSGID[i] %in% fastbat$Gene[fastbat$Pvalue < 2e-6]){
      high_conf_tab$fastBAT[i]<-'Sig'
    } else {
      high_conf_tab$fastBAT[i]<-'Non-Sig'
    }
  }
}

# hmagma
high_conf_tab$hmagma<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(high_conf_tab$ENSGID[i] %in% hmagma_noAstr$GENE){
    if(high_conf_tab$ENSGID[i] %in% hmagma_noAstr$GENE[hmagma_noAstr$P.FDR < 0.05]){
      high_conf_tab$hmagma[i]<-'Sig'
    } else {
      high_conf_tab$hmagma[i]<-'Non-Sig'
    }
  }
}

# PsyOPS
high_conf_tab$psyops<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(high_conf_tab$ENSGID[i] %in% psyops_genes$ensembl_gene_id){
    if(high_conf_tab$ENSGID[i] %in% psyops_prioritised$ensembl_gene_id){
      high_conf_tab$psyops[i]<-'Sig'
    } else {
      high_conf_tab$psyops[i]<-'Non-Sig'
    }
  }
}

# Order genes by the number of analyses indicating them as high confidence.
high_conf_tab_log<-high_conf_tab[,c(-1, -2)]
high_conf_tab_log[high_conf_tab_log == 'NA']<-'F'
high_conf_tab_log[high_conf_tab_log == 'Sig']<-'T'
high_conf_tab_log[high_conf_tab_log == 'Non-Sig']<-'F'
high_conf_tab_log[high_conf_tab_log == 'Pos']<-'T'
high_conf_tab_log[high_conf_tab_log == 'Neg']<-'T'

high_conf_tab_log<-data.frame(sapply( high_conf_tab_log, as.logical))
high_conf_tab_log[is.na(high_conf_tab_log)]<-T

high_conf_tab_log$sum<-rowSums(high_conf_tab_log[,1:3])

high_conf_tab_ordered <-high_conf_tab[order(-high_conf_tab_log$sum, high_conf_tab$ID),-1]
high_conf_tab_ordered$ID<-factor(high_conf_tab_ordered$ID)

names(high_conf_tab_ordered)<-c('ID','Fine-mapping','Expression','Protein','fastBAT','H-MAGMA','PsyOPS')
high_conf_tab_melt<-melt(as.data.table(high_conf_tab_ordered), id.vars='ID')
high_conf_tab_melt$value<-factor(high_conf_tab_melt$value, levels=c('Non-Sig','Sig','Pos','Neg','NA'))
high_conf_tab_melt$ID<-factor(high_conf_tab_melt$ID, levels=rev(levels(high_conf_tab_ordered$ID)))

library(ggplot2)
library(cowplot)

png(here::here('docs/figures/gene_con_heatmap.png'), units = 'px', res=300, height=14000, width=2800)
ggplot(data = high_conf_tab_melt, aes(x='1', y = ID, fill=value)) +
  theme_minimal_grid(color = "white") +
  geom_tile(colour = 'black', width=1.5) +
  scale_fill_manual(values=c('#FFFFFF','#33FF33','#FF6666','#3399FF','#CCCCCC'), drop=F) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),plot.title = element_text(hjust = 0.5)) +
    labs(x ='', y = "", title='', fill='') +
  facet_grid(~ variable) +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        strip.background = element_rect(color="black", fill="grey95", size=0.1, linetype="solid"))
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
dev.off()
## png 
##   2

Show heatmap of high confidence associations

High-confidence genes

  • Significant genes in from expression based analyses that are based on splicing data are indicated as green as direction of effect is challenging to interpret.

Gene table

Make a simple table of gene-by-method associations.

methods_genes <- dplyr::bind_rows(lapply(gene_overlap, function(x) data.frame(ENSID=x)), .id='method')
methods_genes$assoc = TRUE
genes_methods_wide <- 
tidyr::pivot_wider(dplyr::distinct(methods_genes), names_from='method', values_from='assoc', values_fill=FALSE)

genes_methods_gene_names <-
dplyr::left_join(genes_methods_wide,
  dplyr::select(Genes, ENSID=ensembl_gene_id, Gene=external_gene_name,
  Chrom_b37=chromosome_name.37, pos_start_b37=start_position.37, pos_end_b37=end_position.37,
  Chrom_b38=chromosome_name.38, pos_start_b38=start_position.38, pos_end_b38=end_position.38), by='ENSID')

genes_methods_cpid <- dplyr::select(genes_methods_gene_names, ENSID, Gene,
  Chrom_b37, pos_start_b37, pos_end_b37, Chrom_b38, pos_start_b38, pos_end_b38, 
  Nearest_gene=`Nearest Gene`, Fine_mapping=`Fine-mapping`, Expression, Protein, fastBAT, HMAGMA=`H-MAGMA`, PsyOPS)

genes_methods_cpid$chrom <- with(genes_methods_cpid, dplyr::coalesce(Chrom_b37, Chrom_b38))
genes_methods_cpid$chrom <- with(genes_methods_cpid, ifelse(chrom == 'X', yes=23, no=as.numeric(chrom)))

genes_methods_conf <-
dplyr::arrange(
  dplyr::filter(genes_methods_cpid, Nearest_gene | Fine_mapping | Expression | Protein | fastBAT | HMAGMA | PsyOPS),
  chrom, pos_start_b37, pos_start_b38
)

genes_methods_conf$chrom <- NULL
genes_methods_conf$Chrom_b38 <- paste0('chr', genes_methods_conf$Chrom_b38)

genes_methods_conf <-
dplyr::select(genes_methods_conf, ENSID, Gene,
  Nearest_gene, Fine_mapping, Expression, Protein, fastBAT, HMAGMA, PsyOPS,
  ends_with('b37'), ends_with('b38'))

readr::write_tsv(genes_methods_conf, here::here('docs/tables/genes_methods.tsv'), na='')

Compare high confidence genes across expression/protein panels and methods

This will give some indication of how fine-mapped variants may affect gene expression and protein levels, and may also give clarity for associations that have a different direction of effect in the TWAS and PWAS (which is still the case for the GOPC gene).

Show code

###
# Create a dataframe containing gene ID, PANEL, Method and Z scores for all expression and protein analyses.
###
all_func_res<-NULL

# TWAS
twas_tmp<-twas[,c('PANEL','ID','TWAS.Z'), with=F]
twas_tmp$Sig<-twas$TWAS.P < 3.685926e-08
twas_tmp$Coloc<-twas$COLOC.PP4 > 0.8
names(twas_tmp)<-c('Panel','ID','Z','Sig','Coloc')
twas_tmp$Method<-'FUSION'
twas_tmp$Type<-'Expr.'
twas_tmp$Type[grepl('SPLIC',twas_tmp$Panel)]<-'Splice'
# Retain only the most significant assoc for each gene within PANEL (only relevent for splice panel)
twas_tmp<-twas_tmp[order(-abs(twas_tmp$Z)),]
twas_tmp<-twas_tmp[!duplicated(paste0(twas_tmp$Panel, twas_tmp$ID)),]
twas_tmp$Tissue<-NA
twas_tmp$Tissue[!grepl('Adrenal|BLOOD|Blood|Thyroid',twas_tmp$Panel)]<-'Brain'
twas_tmp$Tissue[grepl('BLOOD|Blood',twas_tmp$Panel)]<-'Blood'
twas_tmp$Tissue[grepl('Adrenal|Thyroid',twas_tmp$Panel)]<-'HPA/HPT'

twas_tmp<-merge(twas_tmp, focus_sig[,c('ID','PANEL','FOCUS_pip')], by.x=c('Panel','ID'), by.y=c('PANEL','ID'), all.x=T)
twas_tmp$FOCUS[twas_tmp$FOCUS_pip > 0.5]<-T
twas_tmp$FOCUS[twas_tmp$FOCUS_pip < 0.5 | is.na(twas_tmp$FOCUS_pip)]<-F
twas_tmp<-twas_tmp[order(-twas_tmp$FOCUS_pip),]
twas_tmp<-twas_tmp[!duplicated(paste0(twas_tmp$Panel, twas_tmp$ID)),]
twas_tmp$FOCUS_pip<-NULL

all_func_res<-rbind(all_func_res, twas_tmp)

# SMR expression
smr_res_dat$Z<-smr_res_dat$b_SMR/smr_res_dat$se_SMR
smr_res_dat$Sig<-smr_res_dat$p_SMR_fdr_all < 0.05
smr_res_dat$Coloc<-smr_res_dat$p_HEIDI > 0.05
smr_tmp<-smr_res_dat[,c('eQTL_source','HGNCName','Z','Sig','Coloc'), with=F]
names(smr_tmp)<-c('Panel','ID','Z','Sig','Coloc')
smr_tmp$Method<-'SMR'
smr_tmp$Type<-'Expr.'
smr_tmp$Tissue<-NA
smr_tmp$Tissue[!grepl('eQTLGen_Blood',smr_tmp$Panel)]<-'Brain'
smr_tmp$Tissue[grepl('eQTLGen_Blood',smr_tmp$Panel)]<-'Blood'
smr_tmp$FOCUS<-F

all_func_res<-rbind(all_func_res, smr_tmp)

# PWAS
pwas_smr_rosmap_banner$rosmap.sig<-T
pwas_rosmap_tmp<-pwas_smr_rosmap_banner[,c('ID','rosmap.TWAS.Z','rosmap.sig','rosmap.causal'), with=F]
pwas_rosmap_tmp$PANEL<-'ROSMAP DLPFC'
pwas_rosmap_tmp<-pwas_rosmap_tmp[,c('PANEL','ID','rosmap.TWAS.Z','rosmap.sig','rosmap.causal'), with=F]
names(pwas_rosmap_tmp)<-c('Panel','ID','Z','Sig','Coloc')
pwas_rosmap_tmp<-pwas_rosmap_tmp[order(-abs(pwas_rosmap_tmp$Z)),]
pwas_rosmap_tmp<-pwas_rosmap_tmp[!duplicated(paste0(pwas_rosmap_tmp$Panel, pwas_rosmap_tmp$ID)),]
pwas_rosmap_tmp$Method<-'FUSION'
pwas_rosmap_tmp$Type<-'Protein'
pwas_rosmap_tmp$Tissue<-'Brain'
pwas_rosmap_tmp$FOCUS<-F

all_func_res<-rbind(all_func_res, pwas_rosmap_tmp)

pwas_banner_tmp<-pwas_smr_rosmap_banner[,c('ID','banner.TWAS.Z','banner_replicated','banner.causal'), with=F]
pwas_banner_tmp$PANEL<-'Banner et al. DLPFC'
pwas_banner_tmp<-pwas_banner_tmp[,c('PANEL','ID','banner.TWAS.Z','banner_replicated','banner.causal'), with=F]
names(pwas_banner_tmp)<-c('Panel','ID','Z','Sig','Coloc')
pwas_banner_tmp<-pwas_banner_tmp[order(-abs(pwas_banner_tmp$Z)),]
pwas_banner_tmp<-pwas_banner_tmp[!duplicated(paste0(pwas_banner_tmp$Panel, pwas_banner_tmp$ID)),]
pwas_banner_tmp$Method<-'FUSION'
pwas_banner_tmp$Type<-'Protein'
pwas_banner_tmp$Tissue<-'Brain'
pwas_banner_tmp$FOCUS<-F

all_func_res<-rbind(all_func_res, pwas_banner_tmp)

# SMR protein
pwas_smr_rosmap_banner$z_SMR<-abs(qnorm(pwas_smr_rosmap_banner$p_SMR/2))
pwas_smr_rosmap_banner$z_SMR<-sign(pwas_smr_rosmap_banner$b_SMR)*pwas_smr_rosmap_banner$z_SMR

smr_rosmap_tmp<-pwas_smr_rosmap_banner[,c('ID','z_SMR','smr_replicated','smr.causal'), with=F]
smr_rosmap_tmp$PANEL<-'ROSMAP DLPFC'
smr_rosmap_tmp<-smr_rosmap_tmp[,c('PANEL','ID','z_SMR','smr_replicated','smr.causal'), with=F]
names(smr_rosmap_tmp)<-c('Panel','ID','Z','Sig','Coloc')
smr_rosmap_tmp<-smr_rosmap_tmp[order(-abs(smr_rosmap_tmp$Z)),]
smr_rosmap_tmp<-smr_rosmap_tmp[!duplicated(paste0(smr_rosmap_tmp$Panel, smr_rosmap_tmp$ID)),]
smr_rosmap_tmp$Method<-'SMR'
smr_rosmap_tmp$Type<-'Protein'
smr_rosmap_tmp$Tissue<-'Brain'
smr_rosmap_tmp$FOCUS<-F

all_func_res<-rbind(all_func_res, smr_rosmap_tmp)

# Subset to high confidence genes
high_conf_id<-Genes$external_gene_name[Genes$ensembl_gene_id %in% high_conf]
all_func_res<-all_func_res[all_func_res$ID %in% high_conf_id,]

# Remove missing values
all_func_res<-all_func_res[complete.cases(all_func_res),]

all_func_res$Group<-paste0(all_func_res$Tissue,'\n',all_func_res$Method,'\n',all_func_res$Type )
all_func_res$Group<-factor(all_func_res$Group, levels=c("Brain\nFUSION\nExpr.","Brain\nFUSION\nSplice","Brain\nSMR\nExpr.","Brain\nFUSION\nProtein","Brain\nSMR\nProtein","Blood\nFUSION\nExpr.","Blood\nSMR\nExpr.","HPA/HPT\nFUSION\nExpr."))

all_func_res$ID<-factor(all_func_res$ID, levels=rev(levels(high_conf_tab_ordered$ID)))

all_func_res$Panel[all_func_res$Panel == "Adrenal_Gland"]<-'GTeX Adrenal Gland'
all_func_res$Panel[all_func_res$Panel == "Brain_Cerebellar_Hemisphere"]<-'GTeX Cerebellar Hemisphere'
all_func_res$Panel[all_func_res$Panel == "Brain_Cerebellum"]<-'GTeX Cerebellum'
all_func_res$Panel[all_func_res$Panel == "Brain_Hypothalamus"]<-'GTeX Hypothalamus'
all_func_res$Panel[all_func_res$Panel == "CMC.BRAIN.RNASEQ"]<-'CMC DLPFC'
all_func_res$Panel[all_func_res$Panel == "CMC.BRAIN.RNASEQ_SPLICING"]<-'CMC DLPFC'
all_func_res$Panel[all_func_res$Panel == "Pituitary"]<-'GTeX Pituitary'
all_func_res$Panel[all_func_res$Panel == "PsychENCODE"]<-'PsychENCODE DLPFC'
all_func_res$Panel[all_func_res$Panel == "Whole_Blood"]<-'GTeX'
all_func_res$Panel[all_func_res$Panel == "NTR.BLOOD.RNAARR"]<-'NTR'
all_func_res$Panel[all_func_res$Panel == "Thyroid"]<-'GTeX Thyroid'
all_func_res$Panel[all_func_res$Panel == "Brain_Caudate_basal_ganglia"]<-'GTeX Caudate basalganglia'
all_func_res$Panel[all_func_res$Panel == "YFS.BLOOD.RNAARR"]<-'YFS'
all_func_res$Panel[all_func_res$Panel == "Brain_Cortex"]<-'GTeX Cortex'
all_func_res$Panel[all_func_res$Panel == "Brain_Frontal_Cortex_BA9"]<-'GTeX Frontal Cortex BA9'
all_func_res$Panel[all_func_res$Panel == "Brain_Hippocampus"]<-'GTeX Hippocampus'
all_func_res$Panel[all_func_res$Panel == "Brain_Amygdala"]<-'GTeX Amygdala'
all_func_res$Panel[all_func_res$Panel == "Brain_Anterior_cingulate_cortex_BA24"]<-'GTeX Anterior cingulate cortex BA24'
all_func_res$Panel[all_func_res$Panel == "Brain_Substantia_nigra"]<-'GTeX Substantia nigra'
all_func_res$Panel[all_func_res$Panel == "Brain_Nucleus_accumbens_basal_ganglia"]<-'GTeX Nucleus accumbens basalganglia'
all_func_res$Panel[all_func_res$Panel == "Brain_Putamen_basal_ganglia"]<-'GTeX Nucleus Putamen basalganglia'
all_func_res$Panel[all_func_res$Panel == "eQTLGen_Blood"]<-'eQTLGen'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Cerebellum"]<-'MetaBrain Cerebellum'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Basalganglia"]<-'MetaBrain Basalganglia'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Cortex"]<-'MetaBrain Cortex'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Hippocampus"]<-'MetaBrain Hippocampus'

all_func_res$Panel<-factor(all_func_res$Panel, levels=c("GTeX Adrenal Gland" ,"GTeX Amygdala" ,"GTeX Anterior cingulate cortex BA24" ,"GTeX Caudate basalganglia" ,"GTeX Cerebellar Hemisphere" ,"GTeX Cerebellum" ,"GTeX Cortex" ,"GTeX Frontal Cortex BA9" ,"GTeX Hippocampus" ,"GTeX Hypothalamus", "GTeX Nucleus accumbens basalganglia","GTeX Nucleus Putamen basalganglia" ,"GTeX Pituitary",'GTeX Substantia nigra' ,"GTeX Thyroid","CMC DLPFC", "PsychENCODE DLPFC", "GTeX" ,"NTR" ,"YFS", "eQTLGen" ,'MetaBrain Basalganglia',"MetaBrain Cerebellum" ,"MetaBrain Cortex" , 'MetaBrain Hippocampus',"ROSMAP DLPFC" ,"Banner et al. DLPFC"))


# Create heatmap
library(ggplot2)

heatmap<-ggplot(data = all_func_res, aes(x = Panel, y = ID)) +
    theme_bw()  +
    geom_tile(aes(fill = Z), width=0.95, height=0.95) +
  geom_tile(data=all_func_res[all_func_res$Sig == T,], aes(x = Panel, y = ID), colour='black', fill=NA, size=0.3, width=0.95, height=0.95) +
  geom_tile(data=all_func_res[all_func_res$Coloc ==T & all_func_res$Sig == T,], aes(x = Panel, y = ID), colour='green2', fill=NA, size=0.3, width=0.95, height=0.95) +
  geom_tile(data=all_func_res[all_func_res$Coloc ==T & all_func_res$Sig == T & all_func_res$FOCUS == T,], aes(x = Panel, y = ID), colour='magenta2', fill=NA, size=0.3, width=0.95, height=0.95) +
    scale_fill_gradientn(colours=c("dodgerblue2","white","red"), na.value = 'white',name = "Z-score") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),plot.title = element_text(hjust = 0.5)) +
    geom_text(aes(label=round(Z,1)), color="black", size=3) +
    labs(title="High confidence genes across panels and methods") +
  facet_wrap(~ Group , nrow=1, scales = "free_x")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
group_siz<-NULL
for(i in c("Brain\nFUSION\nExpr.","Brain\nFUSION\nSplice","Brain\nSMR\nExpr.","Brain\nFUSION\nProtein","Brain\nSMR\nProtein","Blood\nFUSION\nExpr.","Blood\nSMR\nExpr.","HPA/HPT\nFUSION\nExpr.")){
  group_siz<-rbind(group_siz, data.frame(Group=i,
                           Size=length(unique(all_func_res$Panel[all_func_res$Group==i]))))
}

# Set minimum size to 3 to allow space for labels
group_siz$Size[group_siz$Size < 2]<-2
group_siz$Prop<-group_siz$Size/sum(group_siz$Size)
group_siz$Width<-4*group_siz$Prop

library(grid)
gt = ggplot_gtable(ggplot_build(heatmap))

for(i in 1:nrow(group_siz)){
  gt$widths[gt$layout$l[grep(paste0('panel-',i,'-1'), gt$layout$name)]] = group_siz$Width[i]*gt$widths[gt$layout$l[grep(paste0('panel-',i,'-1'), gt$layout$name)]]

}

png('../../docs/figures/gene_con_func_heatmap.png', units = 'px', res=300, height=8000, width=3200)

grid.draw(gt)

a<-dev.off()

Show heatmap of high confidence associations across expression and protein datasets and methods

High-confidence genes

  • Black box indicates significance
  • Green box indicates colocalisation
  • Magenta box indicates FOCUS PIP > 0.5
  • Some MetaBrain Basalganglia, Hippocampus, Spinalcord panels are not present due to not containing any high confidence genes
  • Banner PWAS and ROSMAP SMR results only shown for genes that were signficant in the discovery ROSMAP PWAS.

Query GO terms using PANTHER

Conduct an overrepresentation test with PANTHER using the PANTHER API. The service returns results as JSON. The first step is querying the IDs for humans (the taxon) and the annotation datasets.

Show code

library(httr)
library(jsonlite)

# the PantherDB URL
panther_db <- "http://pantherdb.org"

# Get list of taxon IDs
supportedgenomes_response <- GET(modify_url(panther_db, path="/services/oai/pantherdb/supportedgenomes"))

# find the taxon ID for humans
supportedgenomes <- fromJSON(content(supportedgenomes_response, "text"))
genomes <- supportedgenomes$search$output$genomes$genome
human_taxon_id <- genomes$taxon_id[which(genomes$name == 'human')]

# get list of annotation datasets
supportedannotdatasets_response <- GET(modify_url(panther_db, path="services/oai/pantherdb/supportedannotdatasets"))

# find GO ids for biological process, molecular function, and cellular components
supportedannotdatasets <- fromJSON(content(supportedannotdatasets_response, "text"))
annotation_data_types <- supportedannotdatasets$search$annotation_data_sets$annotation_data_type
biological_process_id = annotation_data_types$id[which(annotation_data_types$label == "biological_process")]
cellular_component_id = annotation_data_types$id[which(annotation_data_types$label == "cellular_component")]
molecular_function_id = annotation_data_types$id[which(annotation_data_types$label == "molecular_function")]

The next step is to query the overrepresentation test.

Show code

# construct enrichment overrepresentation query from gene list
# and annotation ID
overrep_url <- function(gene_list, annot_data_set_id, url=panther_db, organism_id=human_taxon_id) {
    modify_url(url,
        path="/services/oai/pantherdb/enrich/overrep",
        query=list(geneInputList=paste(gene_list, collapse=","),
                   organism=organism_id,
                   annotDataSet=annot_data_set_id,
                   enrichmentTestType="FISHER",
                   correction="FDR"))
}

# construct URL and query PANTHER. Parse out JSON response
overrep_query <- function(genes, annot_data_set_id, ...) {
    # make query
    overrep_response <- GET(overrep_url(genes, annot_data_set_id, ...))
    # parse JSON
    overrep <- fromJSON(content(overrep_response, "text"))
    return(overrep)
}

high_conf_overrep_biol <- overrep_query(high_conf, biological_process_id)

high_conf_overrep_mol <- overrep_query(high_conf, molecular_function_id)

high_conf_overrep_cell<- overrep_query(high_conf, cellular_component_id)

Biological processes

Show code

# extract results table from the query
panther_format <- function(query) {
    results <- query$results$result
    results_labels <- results$term
    results_terms <- cbind(results_labels,
                           results[,c('fold_enrichment', 'fdr',
                                      'number_in_list', 'expected', 
                                      'number_in_reference', 'pValue')])
                                      
    return(results_terms)
}

high_conf_overrep_biol_results <- panther_format(high_conf_overrep_biol)
high_conf_overrep_mol_results <- panther_format(high_conf_overrep_mol)
high_conf_overrep_cell_results <- panther_format(high_conf_overrep_cell)

Show Go: Biological Processes table

# filter for FDR
knitr::kable(high_conf_overrep_biol_results[which(high_conf_overrep_biol_results$fdr <= 0.05),], caption='GO: Biological Processes')
GO: Biological Processes
id label fold_enrichment fdr number_in_list expected number_in_reference pValue
GO:0007399 nervous system development 2.7177826 0.0000000 59 21.7088737 2191 0.0000000
GO:0007275 multicellular organism development 1.8858068 0.0000249 79 41.8918840 4228 0.0000000
GO:0048731 system development 1.9459507 0.0000206 74 38.0276847 3838 0.0000000
GO:0048856 anatomical structure development 1.7069601 0.0002165 87 50.9677983 5144 0.0000001
GO:0032502 developmental process 1.6355884 0.0003225 92 56.2488708 5677 0.0000001
GO:0007417 central nervous system development 2.9282342 0.0004811 30 10.2450823 1034 0.0000002
GO:0022008 neurogenesis 2.5818399 0.0015368 33 12.7815824 1290 0.0000007
GO:0065008 regulation of biological quality 1.8115711 0.0015966 66 36.4324639 3677 0.0000008
GO:0032501 multicellular organismal process 1.5182678 0.0019559 99 65.2058866 6581 0.0000011
GO:0007420 brain development 3.1254649 0.0018301 24 7.6788576 775 0.0000012
GO:0050807 regulation of synapse organization 5.7672269 0.0029989 12 2.0807227 210 0.0000021
GO:0099537 trans-synaptic signaling 3.9808585 0.0029142 17 4.2704357 431 0.0000022
GO:0007416 synapse assembly 8.4105392 0.0028188 9 1.0700860 108 0.0000023
GO:0048523 negative regulation of cellular process 1.6422946 0.0026664 77 46.8856185 4732 0.0000024
GO:0120035 regulation of plasma membrane bounded cell projection organization 3.3116498 0.0024947 21 6.3412502 640 0.0000024
GO:0050803 regulation of synapse structure or activity 5.6070261 0.0027195 12 2.1401719 216 0.0000028
GO:0050808 synapse organization 4.6942544 0.0027578 14 2.9823692 301 0.0000030
GO:0060322 head development 2.9431778 0.0027913 24 8.1544514 823 0.0000032
GO:0031344 regulation of cell projection organization 3.2308779 0.0028593 21 6.4997814 656 0.0000035
GO:0048699 generation of neurons 2.5810120 0.0027974 29 11.2359027 1134 0.0000036
GO:0009653 anatomical structure morphogenesis 2.0302598 0.0032861 45 22.1646510 2237 0.0000044
GO:0032879 regulation of localization 2.0626608 0.0032979 43 20.8468600 2104 0.0000046
GO:0099536 synaptic signaling 3.7218004 0.0036071 17 4.5676818 461 0.0000053
GO:0048812 neuron projection morphogenesis 3.5819415 0.0056113 17 4.7460294 479 0.0000086
GO:0120039 plasma membrane bounded cell projection morphogenesis 3.5449380 0.0061371 17 4.7955705 484 0.0000098
GO:0034762 regulation of transmembrane transport 3.2337318 0.0061707 19 5.8755646 593 0.0000102
GO:0048858 cell projection morphogenesis 3.5086912 0.0064625 17 4.8451115 489 0.0000111
GO:0000902 cell morphogenesis 2.9725889 0.0066603 21 7.0645490 713 0.0000119
GO:0048513 animal organ development 1.7679191 0.0064534 57 32.2412939 3254 0.0000119
GO:0010975 regulation of neuron projection development 3.6288169 0.0071358 16 4.4091505 445 0.0000137
GO:0031346 positive regulation of cell projection organization 4.0255572 0.0080729 14 3.4777794 351 0.0000160
GO:0048519 negative regulation of biological process 1.5383975 0.0086864 81 52.6521929 5314 0.0000177
GO:0032990 cell part morphogenesis 3.3774606 0.0084894 17 5.0333673 508 0.0000179
GO:0034765 regulation of ion transmembrane transport 3.3642157 0.0086489 17 5.0531837 510 0.0000188
GO:0098742 cell-cell adhesion via plasma-membrane adhesion molecules 4.5360212 0.0095777 12 2.6454903 267 0.0000214
GO:0007268 chemical synaptic transmission 3.6567562 0.0102105 15 4.1019962 414 0.0000234
GO:0098916 anterograde trans-synaptic signaling 3.6567562 0.0099346 15 4.1019962 414 0.0000234
GO:0050890 cognition 4.1389404 0.0100762 13 3.1409005 317 0.0000244
GO:0051252 regulation of RNA metabolic process 1.6677615 0.0101049 62 37.1755792 3752 0.0000251
GO:0043269 regulation of ion transport 2.8510302 0.0134991 20 7.0150080 708 0.0000344
GO:0010976 positive regulation of neuron projection development 5.8602467 0.0137779 9 1.5357715 155 0.0000360
GO:0098609 cell-cell adhesion 3.1597606 0.0150420 17 5.3801545 543 0.0000403
GO:0030182 neuron differentiation 2.4639326 0.0153139 26 10.5522366 1065 0.0000420
GO:0051049 regulation of transport 2.0573912 0.0153786 36 17.4978872 1766 0.0000432
GO:0050804 modulation of chemical synaptic transmission 3.4563860 0.0152459 15 4.3397931 438 0.0000438
GO:0019219 regulation of nucleobase-containing compound metabolic process 1.6130368 0.0149637 65 40.2966633 4067 0.0000439
GO:0099177 regulation of trans-synaptic signaling 3.4485127 0.0149671 15 4.3497013 439 0.0000449
GO:0007610 behavior 2.9978160 0.0150994 18 6.0043713 606 0.0000462
GO:0034330 cell junction organization 3.2426175 0.0162541 16 4.9342853 498 0.0000508
GO:0032409 regulation of transporter activity 4.0505607 0.0192586 12 2.9625528 299 0.0000614
GO:0010468 regulation of gene expression 1.5383231 0.0190754 74 48.1043276 4855 0.0000620
GO:0007156 homophilic cell adhesion via plasma membrane adhesion molecules 5.3431661 0.0214559 9 1.6843946 170 0.0000712
GO:1900244 positive regulation of synaptic vesicle endocytosis 50.4632353 0.0221424 3 0.0594492 6 0.0000748
GO:0050793 regulation of developmental process 1.8247052 0.0248940 45 24.6615183 2489 0.0000857
GO:0001764 neuron migration 5.8087177 0.0296578 8 1.3772403 139 0.0001040
GO:0032412 regulation of ion transmembrane transporter activity 4.0815852 0.0326857 11 2.6950313 272 0.0001167
GO:0048666 neuron development 2.5141825 0.0343739 21 8.3526155 843 0.0001249
GO:0007611 learning or memory 4.0370588 0.0346216 11 2.7247559 275 0.0001281
GO:0050794 regulation of cellular process 1.2450034 0.0342498 138 110.8430715 11187 0.0001289
GO:2001257 regulation of cation channel activity 4.9099364 0.0343715 9 1.8330176 185 0.0001315
GO:0034329 cell junction assembly 4.0224318 0.0339417 11 2.7346641 276 0.0001320
GO:0032989 cellular component morphogenesis 2.8406457 0.0358634 17 5.9845549 604 0.0001418
GO:1903423 positive regulation of synaptic vesicle recycling 37.8474265 0.0360630 3 0.0792656 8 0.0001449
NA UNCLASSIFIED 0.3703724 0.0371666 10 26.9998543 2725 0.0001517
GO:0008150 biological_process 1.0960443 0.0365948 194 177.0001457 17864 0.0001517
GO:0022898 regulation of transmembrane transporter activity 3.9368481 0.0375844 11 2.7941134 282 0.0001582
GO:0048667 cell morphogenesis involved in neuron differentiation 3.2112968 0.0388950 14 4.3596095 440 0.0001662
GO:0007612 learning 5.2772011 0.0449656 8 1.5159551 153 0.0001950
GO:0048522 positive regulation of cellular process 1.4418067 0.0455058 81 56.1795133 5670 0.0002002
GO:2000311 regulation of AMPA receptor activity 15.5271493 0.0452179 4 0.2576133 26 0.0002019
GO:0050789 regulation of biological process 1.2224704 0.0454834 143 116.9762495 11806 0.0002059
GO:0044057 regulation of system process 2.8429992 0.0481551 16 5.6278595 568 0.0002211

Show Go: Molecular functions table

# filter for FDR
knitr::kable(high_conf_overrep_mol_results[which(high_conf_overrep_mol_results$fdr <= 0.05),], caption='GO: Molecular Functions')
GO: Molecular Functions
id label fold_enrichment fdr number_in_list expected number_in_reference pValue

Show Go: Cellular Components table

# filter for FDR
knitr::kable(high_conf_overrep_cell_results[which(high_conf_overrep_cell_results$fdr <= 0.05),], caption='GO: Cellular Components')
GO: Cellular Components
id label fold_enrichment fdr number_in_list expected number_in_reference pValue
GO:0043005 neuron projection 3.620031 0.0000000 50 13.8120356 1394 0.0000000
GO:0045202 synapse 3.562111 0.0000000 48 13.4751566 1360 0.0000000
GO:0036477 somatodendritic compartment 4.102702 0.0000000 35 8.5309631 861 0.0000000
GO:0030425 dendrite 4.813663 0.0000000 30 6.2322599 629 0.0000000
GO:0097447 dendritic tree 4.798406 0.0000000 30 6.2520764 631 0.0000000
GO:0098794 postsynapse 4.514283 0.0000000 28 6.2025353 626 0.0000000
GO:0030054 cell junction 2.551512 0.0000000 54 21.1639225 2136 0.0000000
GO:0030424 axon 4.354301 0.0000000 28 6.4304240 649 0.0000000
GO:0120025 plasma membrane bounded cell projection 2.432496 0.0000001 55 22.6105202 2282 0.0000000
GO:0042995 cell projection 2.363815 0.0000002 56 23.6905144 2391 0.0000000
GO:0098984 neuron to neuron synapse 5.371437 0.0000011 19 3.5372286 357 0.0000000
GO:0098793 presynapse 4.213249 0.0000043 22 5.2216232 527 0.0000000
GO:0030426 growth cone 7.763575 0.0000049 13 1.6744864 169 0.0000000
GO:0030427 site of polarized growth 7.497395 0.0000066 13 1.7339356 175 0.0000000
GO:0014069 postsynaptic density 5.263037 0.0000073 17 3.2300743 326 0.0000001
GO:0032279 asymmetric synapse 5.167922 0.0000087 17 3.2895235 332 0.0000001
GO:0099572 postsynaptic specialization 4.944524 0.0000150 17 3.4381466 347 0.0000001
GO:0098978 glutamatergic synapse 4.806022 0.0000485 16 3.3291563 336 0.0000004
GO:0097060 synaptic membrane 4.479765 0.0000508 17 3.7948419 383 0.0000005
GO:0150034 distal axon 5.028365 0.0001418 14 2.7842052 281 0.0000014
GO:0099240 intrinsic component of synaptic membrane 6.569179 0.0001675 11 1.6744864 169 0.0000017
GO:0043197 dendritic spine 6.307904 0.0002319 11 1.7438438 176 0.0000025
GO:0044309 neuron spine 6.237029 0.0002459 11 1.7636602 178 0.0000028
GO:0045211 postsynaptic membrane 4.806022 0.0004540 13 2.7049395 273 0.0000053
GO:0031224 intrinsic component of membrane 1.524569 0.0004682 90 59.0330759 5958 0.0000057
GO:0098839 postsynaptic density membrane 8.872657 0.0004689 8 0.9016465 91 0.0000060
GO:0099699 integral component of synaptic membrane 6.428438 0.0004657 10 1.5555879 157 0.0000062
GO:0099634 postsynaptic specialization membrane 6.900955 0.0024073 8 1.1592598 117 0.0000330
GO:0070161 anchoring junction 2.266313 0.0029447 30 13.2373598 1336 0.0000418
GO:0098797 plasma membrane protein complex 2.780343 0.0032956 20 7.1933557 726 0.0000484
GO:0043025 neuronal cell body 3.197670 0.0039264 16 5.0036427 505 0.0000596
GO:0016021 integral component of membrane 1.461946 0.0053496 84 57.4576716 5799 0.0000838
GO:0005891 voltage-gated calcium channel complex 11.214052 0.0079306 5 0.4458692 45 0.0001281
GO:0044295 axonal growth cone 16.148235 0.0105927 4 0.2477051 25 0.0001763
GO:0098590 plasma membrane region 2.167872 0.0105836 27 12.4546117 1257 0.0001813
GO:0099061 integral component of postsynaptic density membrane 9.704468 0.0135905 5 0.5152266 52 0.0002395
GO:0044297 cell body 2.813281 0.0136859 16 5.6873088 574 0.0002479
GO:0099055 integral component of postsynaptic membrane 5.936851 0.0133777 7 1.1790762 119 0.0002488
GO:0034702 ion channel complex 3.688343 0.0142453 11 2.9823692 301 0.0002719
GO:0099146 intrinsic component of postsynaptic density membrane 9.011292 0.0168366 5 0.5548594 56 0.0003296
GO:0098936 intrinsic component of postsynaptic membrane 5.651882 0.0164735 7 1.2385254 125 0.0003306
GO:0120111 neuron projection cytoplasm 6.728431 0.0179491 6 0.8917383 90 0.0003690
GO:0005794 Golgi apparatus 1.946743 0.0191728 32 16.4377095 1659 0.0004035
GO:0005798 Golgi-associated vesicle 6.582161 0.0191416 6 0.9115547 92 0.0004123
GO:0005886 plasma membrane 1.401522 0.0208750 83 59.2213318 5977 0.0004598
GO:0034703 cation channel complex 4.055081 0.0226139 9 2.2194376 224 0.0005092
GO:0000785 chromatin 2.053277 0.0279554 26 12.6626840 1278 0.0006431
GO:0043198 dendritic shaft 10.623839 0.0320100 4 0.3765117 38 0.0007521
GO:0071944 cell periphery 1.359226 0.0340637 87 64.0069940 6460 0.0008170
GO:0034704 calcium channel complex 7.107498 0.0371501 5 0.7034824 71 0.0009092